library(tidyverse)
library(plotly)

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout
load("wireless.rda")
wireless_feature = wireless[,3:7] %>%
  mutate(d_S1 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[1,]))^2))})) %>%
  mutate(d_S2 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[2,]))^2))})) %>%
  mutate(d_S3 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[3,]))^2))})) %>%
  mutate(d_S4 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[4,]))^2))})) %>%
  mutate(d_S5 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[5,]))^2))}))
plot(x=wireless$x, y=wireless$y)
points(x=AP$x,y=AP$y, col="red", cex=1)
# exploring relationship between distance and signal strength
# without the loss of generality, use X
ap1 = as.numeric(AP[1,])
distances = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - ap1)^2))})
par(mfrow=c(2,2))
plot(y=(distances[distances < cutoff])^2, x=-wireless$S1[distances < cutoff])
plot(x=log(-wireless$S1[distances < cutoff]), y=2*log(distances[distances < cutoff]))
plot(y=distances[distances < cutoff], x=-wireless$S1[distances < cutoff])
plot(x=log(-wireless$S1[distances < cutoff]), y=log(distances[distances < cutoff]))

mod = lm(wireless$S2~log(distances2))
summary(mod)

TODO: try to do that for S2, S3… as well

# exploring relationship between distance and signal strength
# without the loss of generality, use X
ap2 = as.numeric(AP[2,])
distances2 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - ap2)^2))})
par(mfrow=c(1,2))
plot(y=distances2, x=wireless$S2)
plot(x=distances2, y=-wireless$S2)

cutoff = 100
sample_size = 40
num_iter = 1000
k_s = numeric(num_iter)
r_s = numeric(num_iter)
for (i in 1:num_iter) {
  temp_indices = sample((1:254)[distances < cutoff], sample_size)
  temp_mod = lm(wireless_feature[temp_indices ,1]~distances[temp_indices])
  k_s[i] = temp_mod$coefficients[2]
  r_s[i] = summary(temp_mod)$adj.r.squared
}
mean(k_s)
[1] -0.421104
mean(r_s)
[1] 0.8751914
# using bagging to find the coefficients, using 40 points 
k_s = numeric(num_iter)
for (i in 1:num_iter) {
  temp_indices = sample((1:254)[distances2 < cutoff], sample_size)
  temp_mod = lm(wireless_feature[temp_indices ,2]~distances2[temp_indices])
  k_s[i] = temp_mod$coefficients[2]
}
mean(k_s)
[1] -0.4250205
plot(x=(distances2)[distances2 < cutoff], y=-wireless$S2[distances2 < cutoff])
#for distance above 100, the linear relationship between signal and distance breaks
#lots of points have -92 the worse signal ever 
k = -0.42
d = apply(AP, 1, function(x){sqrt(sum((x - wireless[107,1:2])^2))})
dxy = t(apply(AP, 1, function(x) {as.numeric(wireless[107,1:2]) - x}))
ds.dxy = apply(dxy, 2, function(x) as.numeric(k * x / d))
ds.dxy
              x           y
[1,] -0.1784933 -0.38018434
[2,]  0.3837247 -0.17074941
[3,]  0.4188134  0.03154903
[4,] -0.4035757  0.11630423
[5,]  0.3511401 -0.23043571
ds = wireless_feature[86,] - wireless_feature[107,] 
k = mod1$coefficients[2]
b = mod1$coefficients[1]
signals = wireless_feature[224,]
test = t(apply(AP, 1, function(x) {as.numeric(wireless[224,1:2]) - x}))
test2 = apply(test, 2, function(x) as.numeric(k^2/(signals - b)) * x)
df_mod = lm(as.numeric(wireless_feature[223,] - wireless_feature[224,])~0 + 
                         test2[,1] + test2[,2])
summary(df_mod)

Call:
lm(formula = as.numeric(wireless_feature[223, ] - wireless_feature[224, 
    ]) ~ 0 + test2[, 1] + test2[, 2])

Residuals:
      1       2       3       4       5 
 0.2441  5.0087  1.8471 -0.5603  2.8118 

Coefficients:
           Estimate Std. Error t value Pr(>|t|)
test2[, 1]   -1.643      2.177  -0.755    0.505
test2[, 2]   -2.593      5.225  -0.496    0.654

Residual standard error: 3.501 on 3 degrees of freedom
Multiple R-squared:  0.1827,    Adjusted R-squared:  -0.3622 
F-statistic: 0.3353 on 2 and 3 DF,  p-value: 0.7389

##exploring using differentials 
##

sample_index = sample(1:nrow(wireless), 1)
sample_point = wireless[sample_index,]
sample_diff = data.frame(t(apply(wireless[-sample_index,], 1, function(x) x - as.numeric(wireless[sample_index,]))))
sample_diff_y = sample_diff[sample_diff$y == 0,]

mod = lm(x~.-y, data=sample_diff_y)
summary(mod)
basic_x = lm(x~.-y, data=wireless)
basic_y = lm(y~.-x, data=wireless)

#summary(basic_x)
#summary(basic_y)

avg_error = mean(sqrt((wireless$x - basic_x$fitted.values)^2 + (wireless$y - basic_y$fitted.values)^2))
avg_error
plot(wireless$x, wireless$y)
points(basic_x$fitted.values, basic_y$fitted.values, col="red")
segments(wireless$x, wireless$y, basic_x$fitted.values, basic_y$fitted.values, col="blue")
n = nrow(wireless)
#train_percent = 0.6
#sample_indices = sample(1:nrow(wireless), train_percent*n)
knn_predictions = numeric(n)
pwdistances = as.matrix(dist(wireless[,3:7]))
for (i in 1:n) {
  knn_predictions[i] = (1:n)[-i][which.min(as.matrix(pdist::pdist(1/wireless[,(3:7)][i,], 1/wireless[,(3:7)][-i,])))]
}
knn_x = wireless$x[knn_predictions]
knn_y = wireless$y[knn_predictions]
par(mfrow=c(1,2))
plot(density(sqrt((wireless$x - knn_x)^2 + (wireless$y - knn_y)^2)),
     main = "knn performance")

#plot(density(sqrt((wireless$x - basic_x$fitted.values)^2 + (wireless$y - basic_y$fitted.values)^2)),
#     main = "regression performance")
knn_errors = sqrt((wireless$x - knn_x)^2 + (wireless$y - knn_y)^2)
knn_avg_error = mean(knn_errors[knn_errors < 100])
knn_avg_error
[1] 11.845
plot(wireless$x, wireless$y)
points(knn_x, knn_y, col="red")
segments(wireless$x, wireless$y, knn_x, knn_y, col="blue")

knn_bad_loc = wireless[knn_errors > 20,] %>%
  mutate(error = knn_errors[knn_errors > 20]) %>%
  mutate(index = (1:254)[knn_errors > 20]) %>%
  dplyr::arrange(desc(error))
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[knn_errors > 20], y=wireless$y[knn_errors > 20], name = "badloc", mode="markers", text=hover_text[knn_errors > 20]) 
p
#trying nearest neightbor + trigulation
point_index = 4
point = wireless[point_index,3:7]
point_d = pdist::pdist(point, wireless[-point_index,3:7])@dist
top_three = (1:254)[-point_index][order(point_d)[1:3]]
top_three
[1] 156 157 126
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[top_three], y=wireless$y[top_three], 
            name = "neighbors", mode="markers", 
            text=paste(top_three, "<br>", hover_text[top_three])) %>%
  add_trace(x=wireless$x[point_index], y=wireless$y[point_index], 
            name = "neighbors", mode="markers", 
            text=paste(point_index, "<br>", hover_text[point_index]))
p
point_index = 113
point = wireless[point_index,3:7]
point_d = pdist::pdist(point, wireless[-point_index,(3:7)])@dist
top_three = (1:254)[-point_index][order(point_d)[1:3]]
neighbors = numeric(5)
for (i in 1:5) {
  neighbors[i] = (1:n)[-point_index][which.min(as.matrix(pdist::pdist(wireless[,(3:7)[-i]][point_index,], wireless[,(3:7)[-i]][-point_index,])))]
}
neighbors
[1]  33 153   9  33   4
dist(wireless[neighbors,1:2])
           33      153        9     33.1
153  38.47077                           
9    54.00000 79.62412                  
33.1  0.00000 38.47077 54.00000         
4    30.30000 59.06683 23.70000 30.30000

5

point
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[neighbors], y=wireless$y[neighbors], 
            name = "neighbors", mode="markers", 
            text=paste(neighbors, "<br>", hover_text[neighbors])) #%>%
  #add_trace(x=wireless$x[point_index], y=wireless$y[point_index], 
  #          name = "point", mode="markers", 
  #          text=paste(point_index, "<br>", hover_text[point_index]))
  
p
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[top_three], y=wireless$y[top_three], 
            name = "neighbors", mode="markers", 
            text=paste(top_three, "<br>", hover_text[top_three])) %>%
  add_trace(x=wireless$x[point_index], y=wireless$y[point_index], 
            name = "point", mode="markers", 
            text=paste(point_index, "<br>", hover_text[point_index]))
  
p
knn_predictions[3]
[1] 37
neighbors = numeric(5)
for (i in 1:5) {
  neighbors[i] = (1:n)[-point_index][which.min(as.matrix(pdist::pdist(1/wireless[,(3:7)[-i]][point_index,], 1/wireless[,(3:7)[-i]][-point_index,])))]
}
neighbors
[1] 244 244 244 244 233

after analyzing the error, I find that point 243’s signal for AP3 is completely bad comparing to its neighbors, lets check other access points.

Some points, they only f* up on signal from an access point.

set.seed(12345)
sample_indices = sample(1:254, 50)
plot(wireless$x[sample_indices], wireless$y[sample_indices], ylim=c(0, 145), xlim=c(10, 235))
points(knn_x[sample_indices], knn_y[sample_indices], col="yellow")
points((knn_x-df_x)[sample_indices], (knn_y-df_y)[sample_indices], col="red")

segments(wireless$x[sample_indices], wireless$y[sample_indices], knn_x[sample_indices], knn_y[sample_indices], col="blue")
segments(knn_x[sample_indices], knn_y[sample_indices], (knn_x-df_x)[sample_indices], (knn_y-df_y)[sample_indices], col="green")
#segments(wireless$x, wireless$y, knn_x+df_x, knn_y+ df_y, col="green")
par(mfrow=c(1,2))
plot(wireless_feature$S1, wireless_feature$d1, main="ap1")
plot(wireless_feature$S2, wireless_feature$d2, main="ap2")

plot(wireless_feature$S3, wireless_feature$d3, main="ap3")
plot(wireless_feature$S4, wireless_feature$d4, main="ap4")

plot(wireless_feature$S5, wireless_feature$d5, main="ap5")

From the first half of the graph, we can see for different AP, the variance spikes at different points.

Looking at the second half of the graphs.
For access point 5, the relationship between distance and signal is very weak, while
others are more stable. This may have to do with AP5 is in the center of the building.

hover_text = apply(wireless_feature,1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP))
p
kclusters = kmeans(wireless[,3:7], 5)
#kclusters$cluster

ggplot(data=wireless) +
  geom_point(aes(x=x,y=y), colour=kclusters$cluster) 
ggplot(data=wireless) +
  geom_point(aes(x=x,y=y)) +
  scale_fill_manual(kclusters$cluster)
cutoff = 68
# seems like 70 is a good cut off lets check how many points have more than 70
wireless_strong = wireless %>%
  mutate(S1 = S1 > -cutoff) %>% 
  mutate(S2 = S2 > -cutoff) %>% 
  mutate(S3 = S3 > -cutoff) %>% 
  mutate(S4 = S4 > -cutoff) %>% 
  mutate(S5 = S5 > -cutoff) 
# seems like 70 is not a good cutoff as we think
table(apply(wireless_strong[,3:7], 1, sum))

  0   1   2   3 
 19 150  80   5 
bad_locations = wireless_strong[as.numeric(apply(wireless_strong[,3:7], 1, sum)) < 2,]

plot(x=bad_locations$x, y=bad_locations$y, ylim=c(0,150), xlim=c(0,230))
points(x=AP$x,y=AP$y, col="red", cex=5)
View(data.frame(table(wireless$y)))
---
title: "R Notebook"
output: html_notebook
---

```{r library}
library(tidyverse)
library(plotly)
```


```{r import_data}
load("wireless.rda")
wireless_feature = wireless[,3:7] %>%
  mutate(d1 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[1,]))^2))})) %>%
  mutate(d2 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[2,]))^2))})) %>%
  mutate(d3 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[3,]))^2))})) %>%
  mutate(d4 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[4,]))^2))})) %>%
  mutate(d5 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - as.numeric(AP[5,]))^2))}))
```

```{r seeing_places}
plot(x=wireless$x, y=wireless$y)
points(x=AP$x,y=AP$y, col="red", cex=1)
```

```{r}
# exploring relationship between distance and signal strength
# without the loss of generality, use X
ap1 = as.numeric(AP[1,])
distances = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - ap1)^2))})

par(mfrow=c(2,2))
plot(y=(distances[distances < cutoff])^2, x=-wireless$S1[distances < cutoff])

plot(x=log(-wireless$S1[distances < cutoff]), y=2*log(distances[distances < cutoff]))

plot(y=distances[distances < cutoff], x=-wireless$S1[distances < cutoff])

plot(x=log(-wireless$S1[distances < cutoff]), y=log(distances[distances < cutoff]))
```


```{r}
mod = lm(wireless$S2~log(distances2))
summary(mod)
```

TODO:
try to do that for S2, S3... as well

```{r}
# exploring relationship between distance and signal strength
# without the loss of generality, use X
ap2 = as.numeric(AP[2,])
distances2 = apply(wireless[,1:2], 1, function(x){sqrt(sum((x - ap2)^2))})

par(mfrow=c(1,2))

plot(y=distances2, x=wireless$S2)
plot(x=distances2, y=-wireless$S2)

plot(y=distances, x=wireless$S1)
plot(x=distances, y=-wireless$S1)
```

```{r bag_init}
cutoff = 100
sample_size = 40
num_iter = 1000
```


```{r bag1}
k_s = numeric(num_iter)
r_s = numeric(num_iter)

for (i in 1:num_iter) {
  temp_indices = sample((1:254)[distances < cutoff], sample_size)
  temp_mod = lm(wireless_feature[temp_indices ,1]~distances[temp_indices])
  k_s[i] = temp_mod$coefficients[2]
  r_s[i] = summary(temp_mod)$adj.r.squared
}

mean(k_s)
mean(r_s)
```

```{r bag2}
# using bagging to find the coefficients, using 40 points 
k_s = numeric(num_iter)
for (i in 1:num_iter) {
  temp_indices = sample((1:254)[distances2 < cutoff], sample_size)
  temp_mod = lm(wireless_feature[temp_indices ,2]~distances2[temp_indices])
  k_s[i] = temp_mod$coefficients[2]
}

mean(k_s)
```

```{r bag5}

```



```{r}
plot(x=(distances2)[distances2 < cutoff], y=-wireless$S2[distances2 < cutoff])
#for distance above 100, the linear relationship between signal and distance breaks
#lots of points have -92 the worse signal ever 
```

```{r}
mod2 = lm(wireless$S2[distances2 < cutoff]~distances2[distances2 < cutoff])
mod1 = lm(wireless$S1[distances < cutoff]~distances[distances < cutoff])

wireless[c(107, 86),]
```

```{r}
k = -0.42

d = apply(AP, 1, function(x){sqrt(sum((x - wireless[107,1:2])^2))})

dxy = t(apply(AP, 1, function(x) {as.numeric(wireless[107,1:2]) - x}))

ds.dxy = apply(dxy, 2, function(x) as.numeric(k * x / d))

ds.dxy

ds = wireless_feature[86,] - wireless_feature[107,] 
```


```{r}
k = mod1$coefficients[2]
b = mod1$coefficients[1]

signals = wireless_feature[224,]

test = t(apply(AP, 1, function(x) {as.numeric(wireless[224,1:2]) - x}))

test2 = apply(test, 2, function(x) as.numeric(k^2/(signals - b)) * x)

df_mod = lm(as.numeric(wireless_feature[223,] - wireless_feature[224,])~0 + 
                         test2[,1] + test2[,2])

summary(df_mod)


```

```{r}
#12/31/2017 differential attents
diff_12 = (wireless_feature$S1 - wireless_feature$S2)
diff_32 = (wireless_feature$S4 - wireless_feature$S5)
summary(lm(log(wireless_feature$d1/wireless_feature$d2)~0 + diff_12))

summary(lm(log(wireless_feature$d4/wireless_feature$d5)~0 + diff_32))

plot((wireless_feature$S1 - wireless_feature$S2), log(wireless_feature$d1/wireless_feature$d2))
```


```{r}
##exploring using differentials 
##

sample_index = sample(1:nrow(wireless), 1)
sample_point = wireless[sample_index,]
sample_diff = data.frame(t(apply(wireless[-sample_index,], 1, function(x) x - as.numeric(wireless[sample_index,]))))
sample_diff_y = sample_diff[sample_diff$y == 0,]

mod = lm(x~.-y, data=sample_diff_y)
summary(mod)
```

```{r}
basic_x = lm(x~.-y, data=wireless)
basic_y = lm(y~.-x, data=wireless)

#summary(basic_x)
#summary(basic_y)

avg_error = mean(sqrt((wireless$x - basic_x$fitted.values)^2 + (wireless$y - basic_y$fitted.values)^2))
avg_error
```

```{r basic_prediction}
plot(wireless$x, wireless$y)
points(basic_x$fitted.values, basic_y$fitted.values, col="red")
segments(wireless$x, wireless$y, basic_x$fitted.values, basic_y$fitted.values, col="blue")
```

```{r basic_knn}
n = nrow(wireless)
#train_percent = 0.6
#sample_indices = sample(1:nrow(wireless), train_percent*n)
knn_predictions = numeric(n)

pwdistances = as.matrix(dist(wireless[,3:7]))

for (i in 1:n) {
  knn_predictions[i] = (1:n)[-i][which.min(as.matrix(pdist::pdist(1/wireless[,(3:7)][i,], 1/wireless[,(3:7)][-i,])))]
}
```


```{r}
knn_x = wireless$x[knn_predictions]
knn_y = wireless$y[knn_predictions]

par(mfrow=c(1,2))
plot(density(sqrt((wireless$x - knn_x)^2 + (wireless$y - knn_y)^2)),
     main = "knn performance")
#plot(density(sqrt((wireless$x - basic_x$fitted.values)^2 + (wireless$y - basic_y$fitted.values)^2)),
#     main = "regression performance")

knn_errors = sqrt((wireless$x - knn_x)^2 + (wireless$y - knn_y)^2)
knn_avg_error = mean(knn_errors[knn_errors < 100])
knn_avg_error
```


```{r}
plot(wireless$x, wireless$y)
points(knn_x, knn_y, col="red")
segments(wireless$x, wireless$y, knn_x, knn_y, col="blue")
```

```{r knn_error_analysis}
knn_bad_loc = wireless[knn_errors > 20,] %>%
  mutate(error = knn_errors[knn_errors > 20]) %>%
  mutate(index = (1:254)[knn_errors > 20]) %>%
  dplyr::arrange(desc(error))
```

```{r}
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[knn_errors > 20], y=wireless$y[knn_errors > 20], name = "badloc", mode="markers", text=hover_text[knn_errors > 20]) 
p
```

```{r}
#trying nearest neightbor + trigulation
point_index = 4
point = wireless[point_index,3:7]
point_d = pdist::pdist(point, wireless[-point_index,3:7])@dist
top_three = (1:254)[-point_index][order(point_d)[1:3]]
top_three
```

```{r visualizing}
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[top_three], y=wireless$y[top_three], 
            name = "neighbors", mode="markers", 
            text=paste(top_three, "<br>", hover_text[top_three])) %>%
  add_trace(x=wireless$x[point_index], y=wireless$y[point_index], 
            name = "neighbors", mode="markers", 
            text=paste(point_index, "<br>", hover_text[point_index]))
p
```

```{r}
point_index = 113
point = wireless[point_index,3:7]
point_d = pdist::pdist(point, wireless[-point_index,(3:7)])@dist
top_three = (1:254)[-point_index][order(point_d)[1:3]]


neighbors = numeric(5)
for (i in 1:5) {
  neighbors[i] = (1:n)[-point_index][which.min(as.matrix(pdist::pdist(wireless[,(3:7)[-i]][point_index,], wireless[,(3:7)[-i]][-point_index,])))]
}

neighbors
dist(wireless[neighbors,1:2])
```
5
```{r}
point
```


```{r}
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[neighbors], y=wireless$y[neighbors], 
            name = "neighbors", mode="markers", 
            text=paste(neighbors, "<br>", hover_text[neighbors])) #%>%
  #add_trace(x=wireless$x[point_index], y=wireless$y[point_index], 
  #          name = "point", mode="markers", 
  #          text=paste(point_index, "<br>", hover_text[point_index]))
  
p
```

```{r}
hover_text = apply(wireless_feature[,1:5],1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP)) %>%
  add_trace(x=wireless$x[top_three], y=wireless$y[top_three], 
            name = "neighbors", mode="markers", 
            text=paste(top_three, "<br>", hover_text[top_three])) %>%
  add_trace(x=wireless$x[point_index], y=wireless$y[point_index], 
            name = "point", mode="markers", 
            text=paste(point_index, "<br>", hover_text[point_index]))
  
p
```


```{r}
knn_predictions[3]
```

```{r}
neighbors = numeric(5)
for (i in 1:5) {
  neighbors[i] = (1:n)[-point_index][which.min(as.matrix(pdist::pdist(1/wireless[,(3:7)[-i]][point_index,], 1/wireless[,(3:7)[-i]][-point_index,])))]
}
neighbors
```


after analyzing the error, I find that point 243's signal for AP3 is completely bad
comparing to its neighbors, lets check other access points.

Some points, they only f* up on signal from an access point. 

```{r}
set.seed(12345)
sample_indices = sample(1:254, 50)
plot(wireless$x[sample_indices], wireless$y[sample_indices], ylim=c(0, 145), xlim=c(10, 235))
points(knn_x[sample_indices], knn_y[sample_indices], col="yellow")
points((knn_x-df_x)[sample_indices], (knn_y-df_y)[sample_indices], col="red")

segments(wireless$x[sample_indices], wireless$y[sample_indices], knn_x[sample_indices], knn_y[sample_indices], col="blue")
segments(knn_x[sample_indices], knn_y[sample_indices], (knn_x-df_x)[sample_indices], (knn_y-df_y)[sample_indices], col="green")
#segments(wireless$x, wireless$y, knn_x+df_x, knn_y+ df_y, col="green")
```


```{r signal_and_distances}
par(mfrow=c(1,2))
plot(wireless_feature$S1, wireless_feature$d1, main="ap1")
plot(wireless_feature$S2, wireless_feature$d2, main="ap2")
plot(wireless_feature$S3, wireless_feature$d3, main="ap3")
plot(wireless_feature$S4, wireless_feature$d4, main="ap4")
plot(wireless_feature$S5, wireless_feature$d5, main="ap5")
# maybe normal method for signal < 70
# modeling log distance when signal > 70 

plot(y=wireless_feature$S1, wireless_feature$d1, main="ap1")
plot(y=wireless_feature$S2, wireless_feature$d2, main="ap2")
plot(y=wireless_feature$S3, wireless_feature$d3, main="ap3")
plot(y=wireless_feature$S4, wireless_feature$d4, main="ap4")
plot(y=wireless_feature$S5, wireless_feature$d5, main="ap5")
```

From the first half of the graph, we can see for different AP,
the variance spikes at different points. 

Looking at the second half of the graphs.  
For access point 5, the relationship between distance and signal is very weak, while  
others are more stable. This may have to do with AP5 is in the center of the building. 


```{r plotly}
hover_text = apply(wireless_feature,1, function(x) paste(x,collapse = "|"))
p = plot_ly(wireless, x=~x, y=~y, name = "receivers", type="scatter", 
            mode="markers", text=paste(1:254, "<br>", hover_text)) %>% 
  add_trace(x=AP$x, y=AP$y, name = "wifi post", mode="markers", text=rownames(AP))

p
```


```{r}
kclusters = kmeans(wireless[,3:7], 5)
#kclusters$cluster

ggplot(data=wireless) +
  geom_point(aes(x=x,y=y), colour=kclusters$cluster) 
```


```{r}
ggplot(data=wireless) +
  geom_point(aes(x=x,y=y)) +
  scale_fill_manual(kclusters$cluster)

```


```{r}
cutoff = 68
# seems like 70 is a good cut off lets check how many points have more than 70
wireless_strong = wireless %>%
  mutate(S1 = S1 > -cutoff) %>% 
  mutate(S2 = S2 > -cutoff) %>% 
  mutate(S3 = S3 > -cutoff) %>% 
  mutate(S4 = S4 > -cutoff) %>% 
  mutate(S5 = S5 > -cutoff) 

# seems like 70 is not a good cutoff as we think
table(apply(wireless_strong[,3:7], 1, sum))
```

```{r}
bad_locations = wireless_strong[as.numeric(apply(wireless_strong[,3:7], 1, sum)) < 2,]

plot(x=bad_locations$x, y=bad_locations$y, ylim=c(0,150), xlim=c(0,230))
points(x=AP$x,y=AP$y, col="red", cex=5)
```


```{r}
View(data.frame(table(wireless$y)))

```

